#####################################################################
# Application
# Substantive interpretation and counterfactuals: EU Legislature
#####################################################################
# set working directory to current file location
# setwd()

# source replication
source("replication_Rasmussen/replicate_Rasmussen.R")


load("replication_Rasmussen/cbq_binary_q1.RData")
dat1 <- summary(cbq_fit)
dat1 <- dat1$summary
load("replication_Rasmussen/cbq_binary_q2.RData") 
dat2 <- summary(cbq_fit)
dat2 <- dat2$summary
load("replication_Rasmussen/cbq_binary_q3.RData") 
dat3 <- summary(cbq_fit)
dat3 <- dat3$summary
load("replication_Rasmussen/cbq_binary_q4.RData")
dat4 <- summary(cbq_fit)
dat4 <- dat4$summary
load("replication_Rasmussen/cbq_binary_q5.RData")
dat5 <- summary(cbq_fit)
dat5 <- dat5$summary
load("replication_Rasmussen/cbq_binary_q6.RData") 
dat6 <- summary(cbq_fit)
dat6 <- dat6$summary
load("replication_Rasmussen/cbq_binary_q7.RData") 
dat7 <- summary(cbq_fit)
dat7 <- dat7$summary
load("replication_Rasmussen/cbq_binary_q8.RData") 
dat8 <- summary(cbq_fit)
dat8 <- dat8$summary
load("replication_Rasmussen/cbq_binary_q9.RData") 
dat9 <- summary(cbq_fit)
dat9 <- dat9$summary


coef_mean <- cbind(dat1[,1],
                  dat2[,1],
                  dat3[,1],
                  dat4[,1],
                  dat5[,1],
                  dat6[,1],
                  dat7[,1],
                  dat8[,1],
                  dat9[,1])
coef_25 <- cbind(dat1[,4],
                dat2[,4],
                dat3[,4],
                dat4[,4],
                dat5[,4],
                dat6[,4],
                dat7[,4],
                dat8[,4],
                dat9[,4])
coef_975 <- cbind(dat1[,8],
                dat2[,8],
                dat3[,8],
                dat4[,8],
                dat5[,8],
                dat6[,8],
                dat7[,8],
                dat8[,8],
                dat9[,8])

coef_mean <- coef_mean[-14,]
coef_25 <- coef_25[-14,]
coef_975 <- coef_975[-14,]
q_sd <- apply(coef_mean,1,sd)


########################################
# plot coefficient across quantiles
########################################
# var_names <- c("Policy distance rapporteur - EP median",
#               "(Policy distance rapporteur - EP median)*Big party group")
var_names = c("Policy distance rapporteur - EP median",
              "(Policy dist. rapp. - EP median)*Big party group")

pdf("figures/figure3_1.pdf",width = 6,height = 5)
plot(NA,xlim=c(0,1),ylim=c(min(coef_25[3,]),max(coef_975[3,])),
     xlab="Quantiles",
     ylab="Coefficients"
     ,main = var_names[2],
     bty="l")
lines(x=(1:9)/10,y=coef_mean[3,],lwd=2)
lines(x=(1:9)/10,y=coef_25[3,],lty="dashed",lwd=2)
lines(x=(1:9)/10,y=coef_975[3,],lty="dashed",lwd=2)
abline(h=0,lty="dotted",lwd=2,col="gray")
legend("bottomleft",legend = c("Mean","95% credible interval"),
       lty=c("solid","dashed"),
       lwd=c(2,2),
       bty="n")
dev.off()

var_names = c("Constant",
              "Policy distance rapporteur - EP median",
              "(Policy distance rapporteur - EP median)*Big party group",
              "Big party group",
              "Country coherence of key negotiators",
              "Closeness of working relationship (calendar year)",
              "Existing legislation expires",
              "No existing legislation",
              "Workload (pending legislation during first reading)",
              "Issue areas consulted (no. of consultive committees)",
              "New act",
              "Directive",
              "Inter-institutional disagreement"
              )

pdf("figures/figure3_appendix.pdf",width=11,height = 14)
par(mfrow=c(5,3),
    cex.lab=1.5)
plot(NA,xlim=c(0,10),ylim=c(min(coef_25[1,]),max(coef_975[1,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[1])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[1,])
lines(x=1:9,y=coef_25[1,],lty="dashed")
lines(x=1:9,y=coef_975[1,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[2,]),max(coef_975[2,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[2])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[2,])
lines(x=1:9,y=coef_25[2,],lty="dashed")
lines(x=1:9,y=coef_975[2,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[3,]),max(coef_975[3,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[3])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[3,])
lines(x=1:9,y=coef_25[3,],lty="dashed")
lines(x=1:9,y=coef_975[3,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[4,]),max(coef_975[4,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[4])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[4,])
lines(x=1:9,y=coef_25[4,],lty="dashed")
lines(x=1:9,y=coef_975[4,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[5,]),max(coef_975[5,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[5])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[5,])
lines(x=1:9,y=coef_25[5,],lty="dashed")
lines(x=1:9,y=coef_975[5,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[6,]),max(coef_975[6,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[6])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[6,])
lines(x=1:9,y=coef_25[6,],lty="dashed")
lines(x=1:9,y=coef_975[6,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[7,]),max(coef_975[7,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[7])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[7,])
lines(x=1:9,y=coef_25[7,],lty="dashed")
lines(x=1:9,y=coef_975[7,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[8,]),max(coef_975[8,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[8])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[8,])
lines(x=1:9,y=coef_25[8,],lty="dashed")
lines(x=1:9,y=coef_975[8,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[9,]),max(coef_975[9,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[9])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[9,])
lines(x=1:9,y=coef_25[9,],lty="dashed")
lines(x=1:9,y=coef_975[9,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[10,]),max(coef_975[10,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[10])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[10,])
lines(x=1:9,y=coef_25[10,],lty="dashed")
lines(x=1:9,y=coef_975[10,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[11,]),max(coef_975[11,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[11])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[11,])
lines(x=1:9,y=coef_25[11,],lty="dashed")
lines(x=1:9,y=coef_975[11,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[12,]),max(coef_975[12,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[12])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[12,])
lines(x=1:9,y=coef_25[12,],lty="dashed")
lines(x=1:9,y=coef_975[12,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(min(coef_25[13,]),max(coef_975[13,])),
     axes=F,
     xlab="Quantiles",
     ylab=""
     ,main = var_names[13])
axis(1,at=1:9,labels=paste0("0.",1:9))
axis(2)
lines(x=1:9,y=coef_mean[13,])
lines(x=1:9,y=coef_25[13,],lty="dashed")
lines(x=1:9,y=coef_975[13,],lty="dashed")
abline(h=0,lty="dotted",lwd=0.5,col="gray")

plot(NA,xlim=c(0,10),ylim=c(0,10),
     axes=F,
     xlab="",
     ylab="")

legend("topleft",legend=c("Posterior Mean","95% Credible Interval"),
       lty=c("solid","dashed"),
       bty="n",
       cex=1.4)

dev.off()

##############################################################################
# read in data
##############################################################################
# Covariates: 
# 
# Policy distance rapporteur-EP median
# Big party group
# Country coherence of key negotiators
# Closeness of working relationship (calendar year)
# Urgency of legislation: existing legislation expires; No existing legislation
# Workload (pending legislation during first reading)
# Issue areas consulted (no. of consultative committees)
# New act
# Directive
# Inter-institutional disagreement

# recode
dat_r <- read.dta13("replication_Rasmussen/EUP388675-supplementary_material.dta")
dat_r$var15 <- as.numeric(as.character(dat_r$var15))
dat_r$var5 <- as.integer(dat_r$var5)
dat_r$var5[which(dat_r$var5==1)] <- 0
dat_r$var5[which(dat_r$var5==2)] <- 1
dat_r$var16 <- as.integer(dat_r$var16)
dat_r$var16[which(dat_r$var16==1)] <- 0
dat_r$var16[which(dat_r$var16==2)] <- 1
dat_r$var10 <- as.integer(dat_r$var10)
dat_r$var10 <- dat_r$var10 - 1 
dat_r$var15 <- dat_r$var15 - 1998
dat_r$var_int <- dat_r$var5 * dat_r$var6
dat_r$urgcy_yes <- ifelse(dat_r$var1=="Yes",1,0)
dat_r$urgcy_noleg <- ifelse(dat_r$var1=="No existing leg",1,0)
dat_r$var14 <- as.integer(dat_r$var14) - 1 
dat_r$var7 <- as.integer(dat_r$var7) - 1
dat_r$var17 <- as.integer(dat_r$var17) - 1
dat_r1 <- subset(dat_r,dat_r$var13==0)
dat_r1 <- subset(dat_r1,select = c(var16,var6,var3, var5,var10,
                                  var15,urgcy_yes,urgcy_noleg,
                                  var4,var11,var14,
                                  var7,var17))
dat_r1 <- dat_r1[complete.cases(dat_r1),]
dat_r1$choice <- as.integer(dat_r1$var16)
Y <- dat_r1$choice
X <- as.matrix(subset(dat_r1,select = c(var6,var3, var5,var10,
                                       var15,urgcy_yes,urgcy_noleg,
                                       var4,var11,var14,
                                       var7,var17)))

##########################################
# Prediction accuracy
##########################################
coefs <- coef_mean
X <- cbind(rep(1,385),X)
xb <- X %*% coefs

pred_prob1 <- pald(xb[,1],0.1)
pred_prob2 <- pald(xb[,2],0.2)
pred_prob3 <- pald(xb[,3],0.3)
pred_prob4 <- pald(xb[,4],0.4)
pred_prob5 <- pald(xb[,5],0.5)
pred_prob6 <- pald(xb[,6],0.6)
pred_prob7 <- pald(xb[,7],0.7)
pred_prob8 <- pald(xb[,8],0.8)
pred_prob9 <- pald(xb[,9],0.9)

pred_q1 <- ifelse(pred_prob1>0.5,1,0)
pred_q2 <- ifelse(pred_prob2>0.5,1,0)
pred_q3 <- ifelse(pred_prob3>0.5,1,0)
pred_q4 <- ifelse(pred_prob4>0.5,1,0)
pred_q5 <- ifelse(pred_prob5>0.5,1,0)
pred_q6 <- ifelse(pred_prob6>0.5,1,0)
pred_q7 <- ifelse(pred_prob7>0.5,1,0)
pred_q8 <- ifelse(pred_prob8>0.5,1,0)
pred_q9 <- ifelse(pred_prob9>0.5,1,0)

pred_r1 <- sum(pred_q1==dat_r1$var16)/385 # 75.1%
pred_r2 <- sum(pred_q2==dat_r1$var16)/385 # 75.8%
pred_r3 <- sum(pred_q3==dat_r1$var16)/385 # 76.6%
pred_r4 <- sum(pred_q4==dat_r1$var16)/385 # 77.4%
pred_r5 <- sum(pred_q5==dat_r1$var16)/385 # 76.4%
pred_r6 <- sum(pred_q6==dat_r1$var16)/385 # 77.7%
pred_r7 <- sum(pred_q7==dat_r1$var16)/385 # 77.4%
pred_r8 <- sum(pred_q8==dat_r1$var16)/385 # 75.8%
pred_r9 <- sum(pred_q9==dat_r1$var16)/385 # 76.6%


##############################################################
# counterfactual dataset
dat_r2 <- with(dat_r1, data.frame(var6 = seq(0,2,length.out = 1000), 
                                  var3=seq(0,2,length.out = 1000), 
                                  var5=1,
                                  var10=median(var10), 
                                  var15=median(var15), 
                                  urgcy_yes=1,
                                  urgcy_noleg=0,
                                  var4=median(var4,na.rm=T), 
                                  var11=median(var11, na.rm=T), 
                                  var14=1, 
                                  var7=1, 
                                  var17=1
                                  )
)

dat_r2 <- cbind(rep(1,1000),dat_r2)

####################################
# CBQ prediction
X <- as.matrix(dat_r2)
coefs <- coef_mean
xb <- X %*% coefs

pred_prob1 <- pald(xb[,1],0.1)
pred_prob2 <- pald(xb[,2],0.2)
pred_prob3 <- pald(xb[,3],0.3)
pred_prob4 <- pald(xb[,4],0.4)
pred_prob5 <- pald(xb[,5],0.5)
pred_prob6 <- pald(xb[,6],0.6)
pred_prob7 <- pald(xb[,7],0.7)
pred_prob8 <- pald(xb[,8],0.8)
pred_prob9 <- pald(xb[,9],0.9)

pdf("figures/figure3_2.pdf",width = 6,height = 5)
plot(NA, xlim = c(0,2),
     ylim=c(-0.1,1),
     xlab = "Distance between rapporteur and EP median",
     ylab = "Pr(First-reading conclusion)",
     bty="l",
     main="Comparison between predicted probabilities")
lines(seq(0,2,length.out = 1000),pred_prob1,col="gray10",lwd=2)
lines(seq(0,2,length.out = 1000),pred_prob2,col="gray",lwd=2) #
lines(seq(0,2,length.out = 1000),pred_prob3,col="gray",lwd=2) #
lines(seq(0,2,length.out = 1000),pred_prob4,col="gray",lwd=2)
lines(seq(0,2,length.out = 1000),pred_prob5,col="gray",lwd=2)
lines(seq(0,2,length.out = 1000),pred_prob6,col="gray",lwd=2) #
lines(seq(0,2,length.out = 1000),pred_prob7,col="gray",lwd=2) #
lines(seq(0,2,length.out = 1000),pred_prob8,col="gray",lwd=2) #
lines(seq(0,2,length.out = 1000),pred_prob9,col="gray10",lwd=2) #
lines(seq(0,2,length.out =  1000),predm,col="black",lwd=2,lty="dashed")
lines(c(1.2,1.1),c(0.35,0.3))
lines(c(0.15320869, 0.08499242),c(0.4293161, 0.3445462))
text(1.2,0.36,"CBQ-Q1")
text(0.103,0.311,"CBQ-Q9")
legend("topright",bty="n",legend = c("Ordinary logit","CBQ (Q1-Q9)"),
       col=c("black","gray40"),lty=c("dashed","solid"),
       lwd=c(2,2))
dev.off()



